t1 <- Sys.time()
#source("../../hemp_busco_chrom/Rfunctions.R")
#source("~/gits/ViningLab_code/busco_lineplot/Rfunctions.R")
source("Rfunctions.R")
my_min_BUSCO <- 2
paste("min_BUSCO:", my_min_BUSCO)
[1] “min_BUSCO: 2”
#busc <- read_busco("DM_6_busco/full_table.tsv")
busc <- read_busco("DM6_full_table.tsv")
busc$Sequence <- paste("DM6:", busc$Sequence, sep = "")
busc[1:3, ]
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 1 2at71240 Complete DM6:chr04 59375834 59427686 - 5614.7 4099
## 2 10at71240 Complete DM6:chr02 42824870 42847588 - 6723.1 3829
## 3 17at71240 Complete DM6:chr02 8847920 8895418 - 6694.3 3910
p <- busco_bar(busc, max_copy = NULL)
#p <- busco_bar(busc, max_copy = 3)
p <- p + ggtitle("BUSCO: DM6")
p
Sort by chromosome into a list
my_bl <- make_known(busc)
lapply( my_bl[1:3], function(x){x[1:2, ]} )
## $`DM6:chr01`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 4 86at71240 Complete DM6:chr01 85678541 85699353 + 5229.1 3121
## 6 224at71240 Complete DM6:chr01 74708259 74728652 - 2768.8 1841
##
## $`DM6:chr02`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 2 10at71240 Complete DM6:chr02 42824870 42847588 - 6723.1 3829
## 3 17at71240 Complete DM6:chr02 8847920 8895418 - 6694.3 3910
##
## $`DM6:chr03`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 7 265at71240 Complete DM6:chr03 60542709 60565715 + 2667.9 1616
## 11 383at71240 Complete DM6:chr03 12561124 12575718 - 2711.2 1679
#busc <- read_busco("ATL_busco/full_table.tsv")
busc <- read_busco("atlantic_full_table.tsv")
busc$Sequence <- paste("ATL:", busc$Sequence, sep = "")
busc[1:3, ]
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 1 2at71240 Duplicated ATL:chr04_0 24891993 24938026 - 5601.4 4110
## 2 2at71240 Duplicated ATL:chr04_2 15369596 15415265 - 5604.0 4121
## 3 2at71240 Duplicated ATL:chr04_3 19561433 19608034 - 5600.6 4114
p <- busco_bar(busc)
#p <- busco_bar(busc, max_copy = 6)
p <- p + ggtitle("BUSCO: Atlantic")
p
lapply( my_bl[1:3], function(x){x[1:2, ]} )
## $`DM6:chr01`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 4 86at71240 Complete DM6:chr01 85678541 85699353 + 5229.1 3121
## 6 224at71240 Complete DM6:chr01 74708259 74728652 - 2768.8 1841
##
## $`DM6:chr02`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 2 10at71240 Complete DM6:chr02 42824870 42847588 - 6723.1 3829
## 3 17at71240 Complete DM6:chr02 8847920 8895418 - 6694.3 3910
##
## $`DM6:chr03`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 7 265at71240 Complete DM6:chr03 60542709 60565715 + 2667.9 1616
## 11 383at71240 Complete DM6:chr03 12561124 12575718 - 2711.2 1679
my_bl <- add_unknown(my_bl, busc)
lapply( my_bl[1:3], function(x){x[1:2, ]} )
## $`DM6:chr01`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 4 86at71240 Complete DM6:chr01 85678541 85699353 + 5229.1 3121
## 6 224at71240 Complete DM6:chr01 74708259 74728652 - 2768.8 1841
## ATL:chr01_4 ATL:chr01_1 ATL:chr01_0 ATL:chr01_2 ATL:chr01_3 ATL:Unplaced
## 4 65848858 NA NA NA NA NA
## 6 NA 50440194 NA NA NA NA
## ATL:chr03_1
## 4 NA
## 6 NA
##
## $`DM6:chr02`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 2 10at71240 Complete DM6:chr02 42824870 42847588 - 6723.1 3829
## 3 17at71240 Complete DM6:chr02 8847920 8895418 - 6694.3 3910
## ATL:chr02_1 ATL:chr02_2 ATL:chr02_0 ATL:chr02_3
## 2 36328988 NA NA NA
## 3 3993300 NA NA NA
##
## $`DM6:chr03`
## Busco_id Status Sequence Gene_Start Gene_End Strand Score Length
## 7 265at71240 Complete DM6:chr03 60542709 60565715 + 2667.9 1616
## 11 383at71240 Complete DM6:chr03 12561124 12575718 - 2711.2 1679
## ATL:chr03_1 ATL:chr03_3 ATL:chr03_2 ATL:chr07_1 ATL:chr03_0 ATL:chr03_4
## 7 48224495 NA NA NA NA NA
## 11 NA 14358968 NA NA NA NA
# gg_line_map( my_bl[[1]][, c(4, 9:ncol(my_bl[[1]]))], rect = TRUE, lalpha = 0.08 )
my_bl <- position_BUSCOS(my_bl, min_BUSCO = my_min_BUSCO, sort_by = "max_busco")
my_bl_atl <- my_bl
#busc <- read_busco("DM_6_busco/full_table.tsv")
busc <- read_busco("DM6_full_table.tsv")
busc$Sequence <- paste("DM6:", busc$Sequence, sep = "")
my_bl <- make_known(busc)
#busc <- read_busco("ATL_busco/full_table.tsv")
#busc <- read_busco("castle_russet/full_table.tsv")
busc <- read_busco("castle-russet_full_table.tsv")
busc$Sequence <- paste("CR:", busc$Sequence, sep = "")
#busc[1:3, ]
p <- busco_bar(busc)
#p <- busco_bar(busc, max_copy = 6)
#p <- p + ggtitle("BUSCO: Atlantic")
p <- p + ggtitle("BUSCO: Castle-Russet")
p
#lapply( my_bl[1:3], function(x){x[1:2, ]} )
my_bl <- add_unknown(my_bl, busc)
#lapply( my_bl[1:3], function(x){x[1:2, ]} )
#my_bl <- position_BUSCOS(my_bl, sort_by = "max_pos")
#
my_bl <- position_BUSCOS(my_bl, min_BUSCO = my_min_BUSCO, sort_by = "max_busco")
#for(i in 1:length(busco_l)){
for(i in 1:length(my_bl)){
tmp <- my_bl[[i]]
# colnames(tmp)[9:ncol(tmp)] <- paste("CR:", colnames(tmp)[9:ncol(tmp)], sep = "")
#tmp[1:3, ]
rownames(tmp) <- tmp$Busco_id
tmp <- tmp[ , c(4, 9:ncol(tmp))]
#colnames(tmp)[ colnames(tmp) == "Gene_Start" ] <- paste("DM6:", names(my_bl)[i], sep = "")
colnames(tmp)[ colnames(tmp) == "Gene_Start" ] <- names(my_bl)[i]
#tmp[1:3, ]
tmp2 <- my_bl_atl[[i]]
# colnames(tmp2)[9:ncol(tmp2)] <- paste("ATL:", colnames(tmp2)[9:ncol(tmp2)], sep = "")
tmp <- cbind( tmp2[ , ncol(tmp2):9 ], tmp )
#p <- gg_line_map(tmp)
p <- gg_line_map(tmp, rect = TRUE, palpha = 1, size = 2.2,
lalpha = 0.1, linewidth = 1.4, line_na_rm = TRUE)
p <- p + ggtitle( paste("Chromosome:", names(my_bl)[i]) )
print(p)
}
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
t99 <- Sys.time()
t99 - t1
## Time difference of 16.12624 secs